********************************************************************************
*                                                                              *
*                      CREATE BALANCED WORKER PANEL DATA                       *
*                                                                              *
********************************************************************************

// Directory
cd "H:\"

// Options
global step1 = 1	// 1) Obtain workers' characteristics
global step2 = 1	// 2) Link PS and LFS data to Baansommen and SPOLIS
global step3 = 1	// 3) Obtain hours worked and wages from Baansommen and SPOLIS
global step4 = 1	// 4) Obtain worker and firm characteristics
global step5 = 1	// 5) Obtain LFS characteristics
global step6 = 1	// 6) Merge to earnings data
global step7 = 1	// 7) Finalise data

////////////////////////////////////////////////////////////////////////////////
//////////////////////1) Obtain workers' characteristics////////////////////////
////////////////////////////////////////////////////////////////////////////////

if $step1 == 1 { 
	// Obtain workers' age, gender, as well as migrant status
	*forvalues year = 2004(1)2020 {
	forvalues year = 1996(1)2003 {
		if `year' < 2009 {
			import spss RINPERSOON GBAGEBOORTELAND GBAGESLACHT GBAGEBOORTELANDMOEDER GBAGEBOORTELANDVADER GBAGEBOORTEJAAR using "G:\Bevolking\GBAPERSOONTAB\\2009\GBAPERSOON2009TABV1.sav", case(lower) clear
		}
		if `year'>=2009 & `year' <= 2019 & `year' !=2016 & `year' != 2018 {
			import spss RINPERSOON GBAGEBOORTELAND GBAGESLACHT GBAGEBOORTELANDMOEDER GBAGEBOORTELANDVADER GBAGEBOORTEJAAR using "G:\Bevolking\GBAPERSOONTAB\\`year'\GBAPERSOON`year'TABV1.sav", case(lower) clear
		}
		if `year' == 2016  {
			import spss RINPERSOON GBAGEBOORTELAND GBAGESLACHT GBAGEBOORTELANDMOEDER GBAGEBOORTELANDVADER GBAGEBOORTEJAAR using "G:\Bevolking\GBAPERSOONTAB\\`year'\GBAPERSOONTAB`year'V1.sav", case(lower) clear
		}
		if `year' == 2018  {
			import spss RINPERSOON GBAGEBOORTELAND GBAGESLACHT GBAGEBOORTELANDMOEDER GBAGEBOORTELANDVADER GBAGEBOORTEJAAR using "G:\Bevolking\GBAPERSOONTAB\\`year'\GBAPERSOON`year'TABV2.sav", case(lower) clear
		}
		if `year' == 2020 {
			import spss RINPERSOON GBAGEBOORTELAND GBAGESLACHT GBAGEBOORTELANDMOEDER GBAGEBOORTELANDVADER GBAGEBOORTEJAAR using "G:\Bevolking\GBAPERSOONTAB\\`year'\GBAPERSOON`year'TABV3.sav", case(lower) clear
		}	
		g male = gbageslacht=="1"
		destring gbageboortejaar, force replace
		g age = `year'-gbageboortejaar
		g age1825 = age >= 18 & age <=25
		g age2635 = age >= 26 & age <=35
		g age3645 = age >= 36 & age <=45
		g age4655 = age >= 46 & age <=55
		g age5667 = age >= 56 & age <=67
		g migrant = gbageboorteland!="6030"
		g migrant_2ndgen = gbageboortelandmoeder!="6030" | gbageboortelandvader!="6030"
		keep rinpersoon age migrant migrant_2ndgen male
		g year = `year'
		duplicates drop rinpersoon, force
		compress
		save "H:\Robots and Workers\Data\temp_GBApersoon_`year'.dta", replace
	}
}


////////////////////////////////////////////////////////////////////////////////
////////////////2) Link PS and LFS data to Baansommen and SPOLIS////////////////
////////////////////////////////////////////////////////////////////////////////

if $step2 == 1 {
	// Obtain all rinpersoon from original file
	use "H:\Robots and Workers\Data\Workers_PS_LFS.dta", clear
	duplicates drop rinpersoon, force 
	keep rinpersoon
	save "H:\Robots and Workers\Data\temp_rinpersoon_Workers_PS_LFS.dta", replace
			
	// Open LFS files and merge with LFSnew
	use "H:\Robots and Workers\Data\LFSX_before.dta", clear
	keep rinpersoon lfsyear lfsquarter isco08 educ_soi3 educ_soi5

	merge 1:1 rinpersoon lfsyear using "H:\Robots and Workers\Data\LFSX_extnew.dta", keep(1 2 3 4 5) update keepusing(educ_soi5 educ_soi3 isco08_extnew) nogen
	*sort rinpersoon lfsyear lfsquarter

	duplicates drop rinpersoon, force
	save "H:\Robots and Workers\Data\LFS_before_temp.dta", replace 
												
	// Keep firms that are in manufacturing at least once between 2009-2019
	use "H:\Robots and Workers\Data\BEID_PS_20002020.dta", clear
	keep if year >= 2009
	collapse (max) manufacturing, by(beid)
	duplicates drop beid, force
	save "H:\Robots and Workers\Data\temp_BEID_uniquefirms.dta", replace

	// Open worker data from Baansommen and SPOLIS and keep rinpersoon of workers that are:
	//		1) In manufacturing
	//		2) In the LFS
	capture erase "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta"  // Start from scratch by 
	forvalues year = 2004(1)2020 {
		if `year' <= 2016 {
			use "H:\Robots and Workers\Data\Baansommen_`year'.dta", clear
			merge m:1 beid using "H:\Robots and Workers\Data\temp_BEID_uniquefirms.dta", nogen keep(3) keepusing(beid) // Keep only workers in manufacturing firms at least once between 2004-2019
			merge m:1 rinpersoon using "H:\Robots and Workers\Data\LFS_before_temp.dta", nogen keep(3 4 5) update keepusing(rinpersoon)
			keep rinpersoon
			capture append using "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta" // There is no file in the first year (i.e. 2004) so therefore 'capture'
			duplicates drop rinpersoon, force
			save "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", replace 
		}
		if `year' >= 2010 {
			use "H:\Robots and Workers\Data\SPOLIS_`year'.dta", clear
			merge m:1 beid using "H:\Robots and Workers\Data\temp_BEID_uniquefirms.dta", nogen keep(3) keepusing(beid) // Keep only workers in manufacturing firms at least once between 2004-2019
			merge m:1 rinpersoon using "H:\Robots and Workers\Data\LFS_before_temp.dta", nogen keep(3 4 5) update keepusing(rinpersoon)
			keep rinpersoon
			capture append using "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta"
			duplicates drop rinpersoon, force
			save "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", replace
		}
	}
	capture erase "H:\Robots and Workers\Data\temp_BEID_uniquefirms.dta"
	capture erase "H:\Robots and Workers\Data\LFS_before_temp.dta"
	
	append using "H:\Robots and Workers\Data\temp_rinpersoon_Workers_PS_LFS.dta"
	erase "H:\Robots and Workers\Data\temp_rinpersoon_Workers_PS_LFS.dta"
	duplicates drop rinpersoon, force
	
	// Create balanced worker panel
	clear all
	g year = .
	forvalues year = 2004(1)2020 {
		append using "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta"
		replace year = `year' if year == .
	}

	save "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", replace
}

////////////////////////////////////////////////////////////////////////////////
/////////3) Obtain hours worked and wages from Baansommen and SPOLIS////////////
////////////////////////////////////////////////////////////////////////////////

if $step3 {
	local i = 1
	// Merge with Baansommen
	use "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", clear
	forvalues year = 2004(1)2016 {
		nois _dots ` i' 0
		joinby rinpersoon year using "H:\Robots and Workers\Data\Baansommen_`year'.dta", unmatched(master) update
		drop _merge
		local i = `i'+1
	}
	rename (baanid hoursworked income_gross companycar hwage hwage_pctile) (bs_baanid bs_hoursworked bs_income_gross bs_companycar bs_hwage bs_hwage_pctile)
	drop contracthours flexcontract 
	save "H:\Robots and Workers\Data\temp_BS_Workers_PS_LFSmatch.dta", replace

	// Merge with SPOLIS
	use "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", clear
	forvalues year = 2010(1)2020 {
		nois _dots ` i' 0
		joinby rinpersoon year using "H:\Robots and Workers\Data\SPOLIS_`year'.dta", unmatched(master) update
		drop _merge
		local i = `i'+1
	}
	rename (ikvid hoursworked income_beftax companycar hwage hwage_pctile) (sp_ikvid sp_hoursworked sp_income_gross sp_companycar sp_hwage sp_hwage_pctile)
	drop sect Δjobchange Δfirmchange contracthours income_gross
	save "H:\Robots and Workers\Data\temp_SP_Workers_PS_LFSmatch.dta", replace

	// Combine merged datasets from Baansommen and SPOLIS
	use "H:\Robots and Workers\Data\temp_BS_Workers_PS_LFSmatch.dta", clear
	append using "H:\Robots and Workers\Data\temp_SP_Workers_PS_LFSmatch.dta"
	erase "H:\Robots and Workers\Data\temp_BS_Workers_PS_LFSmatch.dta"
	erase "H:\Robots and Workers\Data\temp_SP_Workers_PS_LFSmatch.dta"

	// Calculate total number of variables for each rinpersoon-year-beid combination
	collapse (sum) bs_hoursworked bs_income_gross bs_companycar bs_hwage bs_hwage_pctile sp_hoursworked sp_income_gross sp_companycar sp_hwage sp_hwage_pctile (firstnm) bs_baanid sp_ikvid , by(rinpersoon year beid)

	// Drop empty cells without BEID match if worker is employed somewhere
	sort rinpersoon year beid
	drop if (rinpersoon[_n]==rinpersoon[_n-1]) & (year[_n]==year[_n-1]) & beid[_n]==. & bs_hwage==0 & bs_hoursworked ==0

	// Calculate weighted averages for each variable between Baansommen and SPOLIS
	foreach var in hoursworked income_gross hwage_pctile {
		g `var' = bs_`var' if (year < 2010 & bs_`var'>0) | (sp_`var'==0 & bs_`var'>0)
		replace `var' = (sp_`var'*1 + bs_`var'*7)/8 if `var'==. & sp_`var'>0 & bs_`var'>0 & year == 2010
		replace `var' = (sp_`var'*2 + bs_`var'*6)/8 if `var'==. & sp_`var'>0 & bs_`var'>0 & year == 2011
		replace `var' = (sp_`var'*3 + bs_`var'*5)/8 if `var'==. & sp_`var'>0 & bs_`var'>0 & year == 2012
		replace `var' = (sp_`var'*4 + bs_`var'*4)/8 if `var'==. & sp_`var'>0 & bs_`var'>0 & year == 2013
		replace `var' = (sp_`var'*5 + bs_`var'*3)/8 if `var'==. & sp_`var'>0 & bs_`var'>0 & year == 2014
		replace `var' = (sp_`var'*6 + bs_`var'*2)/8 if `var'==. & sp_`var'>0 & bs_`var'>0 & year == 2015
		replace `var' = (sp_`var'*7 + bs_`var'*1)/8 if `var'==. & sp_`var'>0 & bs_`var'>0 & year == 2016
		replace `var' = sp_`var' if `var'==. & ((year > 2016 & sp_`var'>0) | (sp_`var'>0 & bs_`var'==0))
		if "`var'" == "hoursworked" | "`var'" == "income_gross" {
			replace `var' = 0 if `var' == .
		}
		if "`var'" == "hwage_pctile" {
			// Round weighted wage percentiles to 10
			replace hwage_pctile = round( hwage_pctile,10)
		}
	}
	g companycar = max(bs_companycar, sp_companycar)
	g nonempl = hoursworked == 0
	replace beid = 0 if beid == .
	g hwage = income_gross/hoursworked
	order hwage, before(hwage_pctile)

	drop bs_* sp_*

	// Creates weights based on number of jobs and hours worked
	duplicates tag rinpersoon year, g(indweight)
	replace indweight = 1/(indweight+1)
	bysort rinpersoon year:egen hwweight = total(hoursworked)
	replace hwweight = hoursworked/hwweight
	replace hwweight = indweight if hwweight==.

	// Create indicator for excluding data in analyses (based on unrealisitically low/high wages/hours worked/hourly wage)
	bysort rinpersoon year: egen maxwincome_gross = max(income_gross)
	bysort rinpersoon year: egen minwincome_gross = min(income_gross)
	bysort rinpersoon year: egen maxwhoursworked = max(hoursworked)
	bysort rinpersoon year: egen maxwhwage = max(hwage)
	bysort rinpersoon year: egen minwhwage = min(hwage)
	g include = 1
	replace include = 0 if maxwincome_gross>500000 | minwincome_gross<0 | maxwhoursworked>4380 | (maxwhwage > 500 & maxwhwage!=.) | (minwhwage < 2.5 & minwhwage!=.)
	drop maxwincome_gross minwincome_gross maxwhoursworked maxwhwage minwhwage 
	

	
	save "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", replace
}


////////////////////////////////////////////////////////////////////////////////
/////////////////////4) Obtain worker and firm characteristics//////////////////
////////////////////////////////////////////////////////////////////////////////

if $step4 == 1 {
	// Obtain aggregate sales	
	use "H:\Robots and Workers\Data\BEID_PS_20002019.dta", clear
	merge m:1 beid year using "H:\Robots and Workers\Data\ProductionStatistics.dta", nogen keep(1 3 4 5) keepusing(sbi93_5d sbi) update
	merge m:1 beid year using "H:\Robots and Workers\Data\ABR.dta", nogen keep(1 3 4 5) keepusing(beidstyear mfirmid mun sbi93 sbi) update
	merge m:1 sbi93 using "H:\Robots and Workers\Data\ABR_SBIcrosswalk.dta", nogen keep(1 3 4 5) keepusing(sbi) update
	capture drop sbi4
	g sbi4= substr(sbi,1,4)
	destring sbi4, force replace
	capture drop robotsales
	bysort sbi4 year: egen robotsales = total(robots*sales)
	bysort sbi4 year: egen totalsales = total(sales)
	keep sbi4 year robotsales totalsales
	duplicates drop sbi4 year, force
	save "H:\Robots and Workers\Data\temp_Competition.dta", replace
	
	// Match with personal characteristics
	use "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", clear
	forvalues year = 2004(1)2020 {
		merge m:1 rinpersoon year using "H:\Robots and Workers\Data\temp_GBApersoon_`year'.dta", nogen keep(1 3 4 5) update
		//erase "H:\Robots and Workers\Data\temp_GBApersoon_`year'.dta"
	}
	replace include = 0 if age < 18 | age > 67
	* DROP OBSERVATIONS BASED ON AGE
	*drop if age < 18 | age > 67 
	g age1824 = age >= 18 & age <=24
	g age2534 = age >= 25 & age <=34
	g age3544 = age >= 35 & age <=44
	g age4554 = age >= 45 & age <=54
	g age5567 = age >= 55 & age < 67
	order age*, after(male)
		
	// Merge with firm data
	merge m:1 beid year using "H:\Robots and Workers\Data\BEID_PS_20002020.dta", nogen keep(1 3 4 5) keepusing(valueadded sales sbi93_5d sbi robots robots_r robotsfirm robots_mt sector mfirmid beidstyear mun manufacturing logvalueaddedpworker09 logworkers09)
	merge m:1 beid year using "H:\Robots and Workers\Data\ProductionStatistics.dta", nogen keep(1 3 4 5) keepusing(sbi93_5d sbi sector) update
	tostring sbi93_5d, g(sbi93)
	merge m:1 beid year using "H:\Robots and Workers\Data\ABR.dta", nogen keep(1 3 4 5) keepusing(beidstyear mfirmid mun sbi93 sbi) update
	merge m:1 sbi93 using "H:\Robots and Workers\Data\ABR_SBIcrosswalk.dta", nogen keep(1 3 4 5) keepusing(sbi) update
	drop sbi93 sbi93_5d
	replace sbi = "00000" if nonempl == 1 & sbi == ""
	replace beidstyear = year if nonempl == 1 & beidstyear==.
	replace mfirmid = 0 if nonempl == 1 & mfirmid==.
	replace mun = 0 if nonempl == 1 & mun == .
	capture g sbi2= substr(sbi,1,2)
	capture destring sbi2, force replace
	capture g sbi3= substr(sbi,1,3)
	capture destring sbi3, force replace
	capture g sbi4= substr(sbi,1,4)
	capture destring sbi4, force replace
		
	// Merge with competition data
	merge m:1 sbi4 year using "H:\Robots and Workers\Data\temp_Competition.dta", nogen keep(1 3 4 5) update 
	erase "H:\Robots and Workers\Data\temp_Competition.dta"
	replace robotsales = robotsales-sales if robots == 1
	replace totalsales = totalsales-sales 
	g compete = robotsales/totalsales
	replace compete = . if compete < 0
	replace compete = . if compete > 1
	replace compete = 0 if nonempl == 1 & compete == .
		
	save "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", replace
	compress
	save "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch_step4.dta", replace
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////5) Obtain LFS characteristics///////////////////////////
////////////////////////////////////////////////////////////////////////////////

if $step5 == 1 {
	use "H:\Robots and Workers\Data\LFSX_before.dta", clear
	keep rinpersoon lfsyear lfsquarter isco08 educ_soi3 educ_soi5 rmun

	merge 1:1 rinpersoon lfsyear using "H:\Robots and Workers\Data\LFSX_extnew.dta", keep(1 2 3 4 5) update replace keepusing(educ_soi5 educ_soi3 isco08_extnew wmun) nogen
	sort rinpersoon lfsyear lfsquarter

	// Keep occupation and education BEFORE year of observations
	forvalues year = 2004(1)2020 {
		preserve
			keep if lfsyear <= `year'
			g year = `year'
			gsort -lfsyear -lfsquarter rinpersoon 
			// Only keep most recent observation
			duplicates drop rinpersoon, force 
			save "H:\Robots and Workers\Data\LFS_before_temp`year'.dta", replace
		restore
	}

	// Open worker data
	use "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", clear
	forvalues year = 2004(1)2020 {
		merge m:1 rinpersoon year using "H:\Robots and Workers\Data\LFS_before_temp`year'.dta", nogen keep(1 3 4 5) update replace
		erase "H:\Robots and Workers\Data\LFS_before_temp`year'.dta"
	}

	g lfsyeardiff = year-lfsyear
	//drop if lfsyeardiff ==.
	replace isco08 = . if isco08 == 0 | isco08==9999
	replace educ_soi3 = . if educ_soi3 == 9
	replace educ_soi5 = . if educ_soi5 == 99

	replace mun = rmun if mun ==. & nonempl==1
	
	replace include = 0 if lfsyear < 2000
	
	// Obtain RTI data
	rename isco08 ISCO08Code
	forvalues year = 1996(1)2018 {
			merge m:1 ISCO08Code lfsyear using "H:\Robots and Workers\Base files\RTI\onet-ISCO08RTI_LFSX_`year'.dta", nogen keep(1 3 4 5) update keepusing(RTI* nr_cog_anal nr_cog_pers r_cog r_man nr_man_phys nr_man_pers nr_man_persInt)
	}
	rename (RTI RTIw) (rti rtiw)	
	rename ISCO08Code isco08
	drop rtiw
	
	merge m:1 isco08_extnew using "H:\Robots and workers\Base Files\bluecollar_new.dta", keep(1 3) nogen keepusing(bluecollar)
		rename bluecollar bluecollar_ext
	merge m:1 isco08 using "H:\Robots and workers\Base Files\bluecollar_new.dta", keep(1 3 4 5) nogen keepusing(bluecollar)
		replace bluecollar_ext = bluecollar if bluecollar_ext==.
		
	g lowskilled = educ_soi3 == 1 if educ_soi3!=.
	g lowmedskilled = educ_soi3 == 1 | educ_soi3 == 2 if educ_soi3!=.

	foreach var in nr_cog_anal nr_cog_pers r_cog r_man nr_man_phys nr_man_pers nr_man_persInt {
		capture drop `var'
	}
		
	// Generate indicator whether worker appears at least once in manufacturing sector post (and including) 2009
	g include_post2009 = manufacturing*(year>=2009)
	bysort rinpersoon: ereplace include_post2009 = max(include_post2009)
	replace include_post2009=0 if year < 2009
	save "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", replace
}

////////////////////////////////////////////////////////////////////////////////
/////////////////////////////6) Finalise data///////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

if $step6 == 1 {
	use "H:\Robots and Workers\Data\Workers_PS_LFS_panel_full.dta", clear
	forvalues year = 2004(1)2020 {
		merge m:1 rinpersoon year using "H:\Robots and Workers\Data\Earnings_`year'.dta", nogen keep(1 3 4 5) update
	}

}


////////////////////////////////////////////////////////////////////////////////
/////////////////////////////7) Finalise data///////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

if $step7 == 1 {
    
	// Save tempfile as final file and order
	use "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta", clear
	
	foreach var in hoursworked income_gross hwage sales {
	    g log`var' = ln(`var')
		order log`var', after(`var')
		
	}
	
	order *weight, after(rinpersoon) 
	order include*, after(hwweight)
	order sbi*, after(sector)
	
	qui do "H:\Robots and Workers\Do-files\Variable labels.do"
	
	sort rinpersoon year beid
	compress
	save "H:\Robots and Workers\Data\Workers_PS_LFS_panel_full.dta", replace
	erase "H:\Robots and Workers\Data\temp_Workers_PS_LFSmatch.dta"

	
}



